Identifying Sold On-Off Market Properties

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import plotly.express as px
import plotly.graph_objects as go
import datetime

Table of Contents¶

  • A. Project Understanding and Problem Definition

  • B. Data Understanding

    • 1. Loadind data
    • 2. Cleaning Data functions
  • C. Filters'Functions

    • 1. Comparing Address - Owner's Apperance - Profit Percent Filter
    • 2. Fliping Filter
    • 3. On - Off Market Function
  • D. Plots

    • 1. Chicago
    • 2. Dallas
    • 3. Denver

A. Project Understanding and Problem Definition ¶

This project firstly is to create filters applying to each row from assessor records:

  • Filtering out invested properties, which includes the following factors:

    • Comparing main addresses and mailing addresses of properties. Keeping those rows that have the main address different from and mailing address
    • Counting the frequency of the Owner's Name. Choosing those that appeared more than n times (n - here is threshold that we can change)
    • Calculating profit_percentage and keep those rows in the range of (10, 1000)
  • Filtering out Flipping properties, using conditions:

    • year beetween sales is less than or equal to 2
    • 25% =< profit% < 1000%

After having those sepcial data, checking if a sold property is On or Off market, using:

  • Property ID to check if it matches with Property ID in MLS data and 1st American listing data, which means it is sold On market, else will be consider as Off market deals.

B. Data Understanding ¶

1. Loadind data ¶

In [2]:
#Assesor_record: 1st American: On & OFF
#Only read some certain columns from datasets that will be used for this analysis
assessor_records = pd.read_csv(
    '/Users/ngothixuan/Desktop/On-Off-Project1/Data/ds_assessor_records.csv',
                usecols = ["propertyid" , "situscity", 
               "situsfullstreetaddress", "mailingfullstreetaddress", 
               'ownername1full',
                'currentsalesprice','prevsalesprice',
                'currentsalerecordingdate', 'prevsalerecordingdate',
                'situscensustract', 'situscensusblock', 'situslatitude','situslongitude', 'yearbuilt',
                "storiesnbrcode"
              ])
In [3]:
#1st American: listing, ON market
american_list = pd.read_csv("/Users/ngothixuan/Desktop/On-Off-Project1/Data/ds_listings.csv", 
                             usecols =["propertyid","situscity",
                                       'situsfullstreetaddress',
                                       "propertytype",
                                       "status","solddate"])
In [4]:
#MLSes: listing
chicago_list = pd.read_csv("/Users/ngothixuan/Desktop/On-Off-Project1/Data/red_chicago_property_listings.csv",
                           usecols =["county_data_id","city",
                                     "property_key", 'property_type','unit_count',
                                     "listed_on", "status", "sold_on"] )
dallas_list = pd.read_csv("/Users/ngothixuan/Desktop/On-Off-Project1/Data/red_dallas_property_listings.csv",
                            usecols =["county_data_id", "city",
                                      "property_key", 'property_type','unit_count',
                                      "listed_on", "status", "sold_on"])
denver_list = pd.read_csv("/Users/ngothixuan/Desktop/On-Off-Project1/Data/red_denver_property_listings.csv",
                            usecols =["county_data_id","city",
                                      "property_key", 'property_type','unit_count',
                                      "listed_on", "status", "sold_on"])

2. Cleaning Data functions ¶

In [5]:
def clean_listing_data(df, id_colname, city_colname, city_name ): 
    ''' ----- This function is to clean MLS data and 1st American Listing forms ------
    INPUT: 
        - df: Dataframe, for example: chicago_list/american_list
        - id_colname: the ID column name such as: "county_data_id" for MLSers, "propertyid" for assessor_records
        - city_colname: name of the column showes cities
        - city_name: name of a city in form of string, such as, 'denver', which to choose only a specific city 
    OUTPUT: 
        - df_one_city: return a dataframe with non missing values and non negative Id column, only sold properties,
        and only those rows matched with chosen city
    '''
    
    #Drop  rows that have NA from county_data_id column
    df_dropNA = df[df[id_colname].notna()] 

    #Convert dtype of county_data_id column to np.int64
    df_dropNA.loc[:,id_colname] =  df_dropNA.loc[:,id_colname].astype(np.int64)

    # choose only those ID positive values 
    df_ID =  df_dropNA[df_dropNA[id_colname] >0] 
    
    #choose sold
    df_sold = df_ID[df_ID['status'].str.lower() =="sold"]
    
    # clean City column
    drop_na = df_sold[df_sold[city_colname].notna()] #keep only non nan values  
    
    df_clean_city = drop_na[drop_na[city_colname].str.isnumeric() != True] #keep only non numeric values   
    df_clean_city[city_colname] = df_clean_city[city_colname].str.lower() #conver to lower case 
    
    #Choose a specific city 
    df_one_city = df_clean_city[df_clean_city[city_colname] == city_name]
    
    return df_one_city 
In [6]:
def clean_assessor_record(df, city_name):
    ''' ----- This function is to clean Assessor Record datasets ------
    INPUT: 
        - df: Dataframe, for example: assessor_records
        - city_name: name of a city in form of string, such as, 'denver', which to choose only a specific city 
    OUTPUT: 
        - df_one_city: return a dataframe with non missing values and non negative Id column, 
        only sold properties (those have either "currentsalerecordingdate or" "prevsalerecordingdate"),
        and only those rows matched with chosen city
    '''
    #Drop  rows that have NA from county_data_id column
    df_dropNA = df[df["propertyid"].notna()] 

    #Convert dtype of county_data_id column to np.int64
    df_dropNA.loc[:,"propertyid"] =  df_dropNA.loc[:,"propertyid"].astype(np.int64)

    # choose only those ID positive values 
    df_ID =  df_dropNA[df_dropNA["propertyid"] >0] 
    
    #sold
    df_sold = df_ID.dropna(subset=["currentsalerecordingdate","prevsalerecordingdate"], how = "all")
    
    #clean City column
    drop_na = df_sold[df_sold['situscity'].notna()] #keep only non nan values  
    df_clean_city = drop_na[drop_na['situscity'].str.isnumeric() != True] #keep only non numeric values   
    df_clean_city['situscity'] = df_clean_city['situscity'].str.lower() #conver to lower case 
    
    #Choose a specific city 
    df_one_city = df_clean_city[df_clean_city['situscity'] == city_name]
    
  
    return df_one_city 

C. Filters'Functions ¶

1. Invested Properties ¶

Comparing Address - Count Owner's Apperance - Profit Percent Filter

In [7]:
# compare address
# count ownername and keep those > 0 
# profit > 0 
# 10 =< %profit < 1000%
def invested_properties(df):
    """
      ------ This function is used to identify invested properties form Assessor_records -------
    INPUT: 
        - df: data you want to compare address, for example: assessor_records
    OUTPUT: 
        - different_address_df: dataframe contains rows that have address and mailling address are different,  
        ownername occur > 0 times, profit >0, percent of profit in range (10%, 1000%) 
    """
    
    selected_cols = ['propertyid', 'situsfullstreetaddress', 'situscity',
       'ownername1full', 'mailingfullstreetaddress', 'yearbuilt',
       'storiesnbrcode', 'currentsalerecordingdate', 'currentsalesprice',
       'prevsalerecordingdate', 'prevsalesprice'] #only these columns needed for this function
    
    sub_df = df[selected_cols]
    sub_df = sub_df.drop_duplicates(keep = 'first')
    sub_df = sub_df.dropna(subset = ['situsfullstreetaddress', 'mailingfullstreetaddress',
                                    'ownername1full','currentsalesprice','prevsalesprice'], how= 'any') 
    #comapre
    match_address_df = sub_df[sub_df['situsfullstreetaddress'
                                    ].str.lower().isin(sub_df['mailingfullstreetaddress'].str.lower())]
    #keep different addresses only 
    different_address_df = sub_df.drop(index = match_address_df.index.values) #keep different 
    
    # count ownername and keep those >= threshold
    invested_owners = different_address_df.groupby('ownername1full', as_index=False).filter(lambda x: len(x) > 0) #choose your threshold
    
    #those properties that brought 10%<= profit<1000%
    invested_owners['profit'] = invested_owners.currentsalesprice - invested_owners.prevsalesprice
    invested_owners = invested_owners[invested_owners.profit > 0]
    invested_owners['profit_percentage'] = round((invested_owners.profit / invested_owners.prevsalesprice) * 100, 2)
    address_owners_profit = invested_owners[(invested_owners.profit_percentage >= 10) & 
                                           (invested_owners.profit_percentage < 1000)]
    return  address_owners_profit 

2. Fliping Filter ¶

In [8]:
def identify_flip(df):
    """ --- This function is to filter out those flip properties which have time_between_sale <= 2 and 25% =< profit% < 1000%---
    INPUT: 
        - df: assessor records dataframe
    OUTPUT: 
        - flip: a dataframe with potential flip properties
    """

    #Select special columns 
    selected_cols = ['propertyid', 'situsfullstreetaddress', 'situscity',
       'ownername1full', 'mailingfullstreetaddress', 'yearbuilt',
       'storiesnbrcode', 'currentsalerecordingdate', 'currentsalesprice',
       'prevsalerecordingdate', 'prevsalesprice']
    
    df_flip = df[selected_cols]
    
    # Drop duplicates data, Drop NA
    df_flip = df.drop_duplicates(keep='first')
    df_flip = df_flip.dropna(subset=['yearbuilt','currentsalesprice',
                                      'prevsalesprice','currentsalerecordingdate',
                                       'prevsalerecordingdate'], how = "any")

    # Change data format to datetime
    df_flip['currentsalerecordingdateCOPY'] = df_flip['currentsalerecordingdate'] #use copy col only 
    df_flip.currentsalerecordingdateCOPY = pd.to_datetime(df_flip.currentsalerecordingdateCOPY.astype(str), format='%Y%m%d')
    df_flip.currentsalerecordingdateCOPY = pd.to_datetime(df_flip.currentsalerecordingdateCOPY, errors='coerce').dt.strftime('%Y')
    
    df_flip['prevsalerecordingdateCOPY'] = df_flip['prevsalerecordingdate']
    df_flip.prevsalerecordingdateCOPY = pd.to_datetime(df_flip.prevsalerecordingdateCOPY.astype(str), format='%Y%m%d')
    df_flip.prevsalerecordingdateCOPY = pd.to_datetime(df_flip.prevsalerecordingdateCOPY, errors='coerce').dt.strftime('%Y')

    # Change data types to numeric
    df_flip["currentsalerecordingdateCOPY"] = pd.to_numeric(df_flip["currentsalerecordingdateCOPY"])
    df_flip["prevsalerecordingdateCOPY"] = pd.to_numeric(df_flip["prevsalerecordingdateCOPY"])

    # Create cols to identify flip properties
    df_flip['time_between_sale'] = df_flip.currentsalerecordingdateCOPY - df_flip.prevsalerecordingdateCOPY

    df_flip['profit'] = df_flip.currentsalesprice - df_flip.prevsalesprice

    df_flip =df_flip[df_flip.profit > 0]

    df_flip['profit_percentage'] = round((df_flip.profit / df_flip.prevsalesprice) * 100, 2)

    # Set parameters for flip properties
    flip = df_flip[df_flip.time_between_sale <= 2]
    flip = flip[(flip['profit_percentage'] >= 25) & (flip['profit_percentage'] <1000)]# Adjust profit percentage to more reasonable number
    return flip

3. On - Off Market Function ¶

In [9]:
def Compare_2_dataframe(df1, df2, df1_id, df2_id): 
    
    """
    ------- This function is used for datasets after using cleaning and filter functions above --------
    Input:
        - df1: None Missing values dataframe, ex: data from MLS listing 
        - df2: None Missing values dataframe that is compared with df1, ex: assessor records
        - df1_id: ID column of df1, ex: county_data_id column 
        - df2_id: ID column of df2, ex: propertyid column 
    Output: 
        - match_df1_df2: dataframe includes those properties that exist in both dataframes, df1, df2. 
        - only_df1: those properties only exist in df1
        - only_df2: those properties only exist in df2    
    """
    #Using MERGE
    df_compare = df1.merge(df2, left_on = df1_id, right_on = df2_id, 
                                             how = "outer", indicator = True)
    # Assign match and not match data
    match_df1_df2 = df_compare.query('_merge ==  "both"')
    only_df1 = df_compare.query('_merge == "left_only" ')
    only_df2 = df_compare.query('_merge == "right_only" ')
    
    return match_df1_df2, only_df1, only_df2
In [10]:
#check old
def On_Off_Market(city_name, mls, assessor_df, firstamerican_list):
   
    cleaned_assessor = clean_assessor_record(assessor_df, city_name)
    cleaned_mls = clean_listing_data(mls, "county_data_id", "city", city_name) 
    cleaned_amricanlist = clean_listing_data(firstamerican_list,"propertyid","situscity", city_name) 
    
    #match1: assessor vs. mls 
    assessor_mls = Compare_2_dataframe(cleaned_mls, cleaned_assessor, 
                                      'county_data_id','propertyid')
    

    #take matched rows => On market values
    on_matched_mls = assessor_mls[0][["county_data_id","city",
                                     "property_key", "status", "sold_on", ]] #only want these columns
    on_MLS = on_matched_mls.drop_duplicates(subset=["county_data_id", "property_key",
                                                    "sold_on"], keep='first').sort_values("county_data_id")#drop duplicated rows after matching
    
    #take "right_only" rows from assessor record 
    not_matched_assessor = assessor_mls[2]
    not_matched_assessor_subset = not_matched_assessor[["propertyid" , "situscity", 'ownername1full',
                                           "situsfullstreetaddress","currentsalerecordingdate",
                                          "prevsalerecordingdate", "property_key"]] #select original columns belongs to assessor record
   
    #------------------------ON First American Listing-----------------------------------------------
    #match2: american listing Vs. not_matched_assessor 
    
    assessor_americanlist = Compare_2_dataframe(cleaned_amricanlist, not_matched_assessor_subset , 
                                      'propertyid','propertyid')
    # take matched rows => On market values
    on_matched_americanlist = assessor_americanlist[0][["propertyid","situscity_y", 
                                       'situsfullstreetaddress_y',
                                        "status","solddate"]] #keep original columns of first American listing
    on_americanlist = on_matched_americanlist.drop_duplicates(subset=[
        "propertyid","solddate"], keep='first').sort_values("propertyid")
  
    
    #-------------------ON market that assessor record matched MLS and American listing ----
    on_americanlist.columns = on_MLS.columns.to_list()
    on_combine_MLS_American = on_MLS.append(on_americanlist)
    
    #drop those duplicate of property_key and sold_on (count month and year only) 
    on_combine_MLS_American.sold_on =  pd.to_datetime(on_combine_MLS_American.sold_on, errors='coerce').dt.strftime('%Y-%m')
    on_MLS_American = on_combine_MLS_American.drop_duplicates(["property_key","sold_on"]).sort_values("county_data_id")
    
    
    #------------------------OFF market in Assessor Records-------------------------------------------
    #take not matched from match2, the left => Off market values (never been listed)
    
    off_market = assessor_americanlist[2][["propertyid" , 'ownername1full','situscity_y', 'situsfullstreetaddress_y',
                                          "currentsalerecordingdate", "prevsalerecordingdate"]]
    
    off_market.columns = ["propertyid" ,'ownername1full',  "situscity", "situsfullstreetaddress",
                          "currentsalerecordingdate", "prevsalerecordingdate"]
        #off current sold
    off_current = off_market[["propertyid",'ownername1full',
                                           "situscity", 
                                           "situsfullstreetaddress",
                                           "currentsalerecordingdate"]]
        #off previous sold
    off_prev = off_market[["propertyid",'ownername1full',
                                           "situscity", 
                                           "situsfullstreetaddress",
                                           "prevsalerecordingdate"]]
    
        #rename and combine them together => Off or previous and current sold
    off_current.rename(columns={'currentsalerecordingdate': 'solddate'}, inplace=True)
    off_prev.rename(columns={'prevsalerecordingdate': 'solddate'}, inplace=True)

    off_current_and_prev = off_current.append(off_prev).dropna().sort_values("propertyid")
    off_current_and_prev['solddate'] = pd.to_datetime(off_current_and_prev['solddate'].astype(str), format='%Y%m%d')
    off_current_and_prev['solddate'] = pd.to_datetime(off_current_and_prev['solddate'], errors='coerce').dt.strftime('%Y-%m')

    return  on_MLS,  on_americanlist, on_MLS_American , off_market, off_current_and_prev
In [11]:
def On_Off_Market(city_name, mls, assessor_df, firstamerican_list):
    """--- This function is to check whether a property was listed and sold On MLS/1st American Listing or Off market -----
    INPUT:
    - city_name: name of a city in form of string, as other functions
    -  mls: MLS dataset name, ex: chicago_list
    -  assessor_df: assessor record data
    - firstamerican_list: first american listing data
    
    OUTPUT: 
    - on_MLS: those assessor record rows that matched MLS lisiting 
    - on_americanlist: those rows do not matched MLS but 1st American listing
    - on_MLS_American: combine on_MLS and on_americanlist
    - off_market: those rows do not match any MLSers or 1st AMerican listing
    - off_current_and_prev: count both transactions from previous sales and current sales as the number of off market deals
    """
   
    cleaned_assessor = clean_assessor_record(assessor_df, city_name)
    cleaned_mls = clean_listing_data(mls, "county_data_id", "city", city_name) 
    cleaned_amricanlist = clean_listing_data(firstamerican_list,"propertyid","situscity", city_name) 
    
    #match1: assessor vs. mls 
    assessor_mls = Compare_2_dataframe(cleaned_mls, cleaned_assessor, 
                                      'county_data_id','propertyid')
    
    #------------------------ON MLS-----------------------------------------------
    on_matched_mls = assessor_mls[0][["county_data_id","city",
                                     "property_key", "status", "sold_on", ]] #only want these columns
    on_MLS = on_matched_mls.drop_duplicates(subset=["county_data_id", "property_key",
                                                    "sold_on"], keep='first').sort_values("county_data_id") #drop duplicated rows after matching
    
    #take "right_only" rows from assessor record 
    not_matched_assessor = assessor_mls[2]
    not_matched_assessor_subset = not_matched_assessor[[
                                        "propertyid" , "situscity", 
                                        "situsfullstreetaddress","currentsalerecordingdate",
                                        "prevsalerecordingdate", "property_key"]] #select original columns belongs to assessor record
   
    #------------------------ON First American Listing-----------------------------------------------
    #match2: american listing Vs. not_matched_assessor 
    
    assessor_americanlist = Compare_2_dataframe(cleaned_amricanlist, not_matched_assessor_subset , 
                                      'propertyid','propertyid')
    # take matched rows => On market values
    on_matched_americanlist = assessor_americanlist[0][["propertyid","situscity_y", 
                                       'situsfullstreetaddress_y',
                                        "status","solddate"]] #keep original columns of first American listing
    on_americanlist = on_matched_americanlist.drop_duplicates(subset=[
        "propertyid","solddate"], keep='first').sort_values("propertyid")
  
    
    #-------------------ON market that assessor record matched MLS and American listing ---------------
    on_americanlist.columns = on_MLS.columns.to_list()
    on_combine_MLS_American = on_MLS.append(on_americanlist)
    
    #drop those duplicate of property_key and sold_on (count month and year only) 
    on_combine_MLS_American.sold_on =  pd.to_datetime(on_combine_MLS_American.sold_on, errors='coerce').dt.strftime('%Y-%m')
    on_MLS_American = on_combine_MLS_American.drop_duplicates(["property_key","sold_on"]).sort_values("county_data_id")
    
    
    #------------------------OFF market in Assessor Records-------------------------------------------
    #take not matched from match2, the left => Off market values (never been listed)
    
    off_market = assessor_americanlist[2][["propertyid" ,'situscity_y', 
                                           'situsfullstreetaddress_y',"currentsalerecordingdate",
                                           "prevsalerecordingdate"]]
    
    off_market.columns = ["propertyid" ,"situscity", 
                          "situsfullstreetaddress",
                          "currentsalerecordingdate",
                          "prevsalerecordingdate"]
        #off current sold
    off_current = off_market[["propertyid", "situscity", 
                             "situsfullstreetaddress", "currentsalerecordingdate"]]
        #off previous sold
    off_prev = off_market[["propertyid","situscity", 
                            "situsfullstreetaddress", "prevsalerecordingdate"]]
    
        #rename and combine them together => Off or previous and current sold
    off_current.rename(columns={'currentsalerecordingdate': 'solddate'}, inplace=True)
    off_prev.rename(columns={'prevsalerecordingdate': 'solddate'}, inplace=True)

    off_current_and_prev = off_current.append(off_prev).dropna().sort_values("propertyid")
    off_current_and_prev['solddate'] = pd.to_datetime(off_current_and_prev['solddate'].astype(str), format='%Y%m%d')
    off_current_and_prev['solddate'] = pd.to_datetime(off_current_and_prev['solddate'], errors='coerce').dt.strftime('%Y-%m')

    return  on_MLS,  on_americanlist, on_MLS_American , off_market, off_current_and_prev

D. Plots ¶

In [12]:
def show_invested_properties(city_name, mls, assessor_df, my1, my2):
    """
    INPUT: 
        - city_name: name of a city 
        - mls: MLS dataset name
        - assessor_df: a dataframe from On_OFF_market() function
        - my1, my2: a year-month you want to see the analysis, , ex: "2021-01", "2021-12"
    OUTPUT: 
        - fig1.show(): general picture of On Vs Off for Invested Properties that sold in a chosen city 

    
    """
    selected_cols = ['propertyid', 'situsfullstreetaddress', 'situscity',
       'ownername1full', 'mailingfullstreetaddress', 'yearbuilt',
       'storiesnbrcode', 'currentsalerecordingdate', 'currentsalesprice',
       'prevsalerecordingdate', 'prevsalesprice']
    
    assessor_clean = clean_assessor_record(assessor_df, city_name)
    assessor_clean = assessor_clean[selected_cols]
    
    #Assessor applying filtering functions
    assessor_address = invested_properties(assessor_clean)
    assessor_address = assessor_address[selected_cols]
    assessor_address = assessor_address.drop_duplicates(keep = "first")
    
    #apply on off market function
    df = On_Off_Market(city_name, mls, assessor_address, american_list)
    
    #--------------------------------------------------------
    # Count On
    on_df = df[2][["sold_on", "status"]].groupby(["sold_on"]).size().reset_index(name ='On')
    # Count Off
    off_df = df[4][["solddate", "propertyid"]].groupby(["solddate"]).size().reset_index(name ='Off')
    on_off_df = off_df.merge(on_df, left_on ="solddate", right_on =  "sold_on", how = "outer")
   
    #filter a specific year 
    on_off_year = on_off_df[(on_off_df.sold_on >= my1)&(on_off_df.sold_on <= my2)]

    #plot
    fig1 = go.Figure(data=[
        go.Bar(name='On', x=on_off_year.solddate, y=on_off_year.On,
           text = on_off_year.On,
           marker_color = '#3283FE', 
           texttemplate= '%{text:.2s}'),
        
        go.Bar(name='Off', x=on_off_year.solddate, y=on_off_year.Off,
           text = on_off_year.Off,
           marker_color = '#EF553B', 
           texttemplate= '%{text:.2s}')
    ])


    fig1.update_layout(title= f' Total Sold Invested Properties in %s.'%city_name.capitalize(),
                       title_x=0.5, 
                       legend_title="Market", yaxis_title='#of Sold Properties', 
                       barmode='stack',
                       height=400, width=800,
                      yaxis_range=[0,1000])
    fig1.show()
    return None
In [13]:
def show_flip_properties(city_name, mls, assessor_df, my1, my2):
    """
    INPUT: 
        - city_name: name of a city 
        - mls: MLS dataset name
        - df: On_OFF_market() function
        - my1, my2: a year-month you want to see the analysis, , ex: "2021-01", "2021-12"
    OUTPUT: 
     
        - fig2.show(): detail the number of On Vs. Off flipped deals
    
    """
    selected_cols = ['propertyid', 'situsfullstreetaddress', 'situscity',
       'ownername1full', 'mailingfullstreetaddress', 'yearbuilt',
       'storiesnbrcode', 'currentsalerecordingdate', 'currentsalesprice',
       'prevsalerecordingdate', 'prevsalesprice'] #choose only columns needed
    
    #apply clean function
    assessor_clean = clean_assessor_record(assessor_df, city_name)
    assessor_clean = assessor_clean[selected_cols]
    
    #Assessor applying filtering functions
    assessor_address = invested_properties(assessor_clean) #is invested properties
    assessor_address = assessor_address[selected_cols]

    assessor_address = assessor_address.drop_duplicates(keep = "first")
    
    flip = identify_flip(assessor_address)  #break down by flip
    df = On_Off_Market(city_name, mls, flip, american_list)
    
    
    # Count On
    on_df = df[2][["sold_on", "status"]].groupby(["sold_on"]).size().reset_index(name ='On')
    #Off
    off_df = df[4][["solddate", "propertyid"]].groupby(["solddate"]).size().reset_index(name ='Off')
    on_off_df = off_df.merge(on_df, left_on ="solddate", right_on =  "sold_on", how = "outer")
   
    #filter a specific year 
    on_off_year = on_off_df[(on_off_df.sold_on >= my1)&(on_off_df.sold_on <= my2)]


    # Specific Year #on_off_year -------------------------------------------------
    fig2 = go.Figure(data=[
        go.Bar(name='On', x=on_off_year.solddate, y=on_off_year.On,
           text = on_off_year.On,
           marker_color = '#3283FE', 
           texttemplate= '%{text:.2s}'),
        
        go.Bar(name='Off', x=on_off_year.solddate, y=on_off_year.Off,
           text = on_off_year.Off,
           marker_color = '#EF553B', 
           texttemplate= '%{text:.2s}')
    ])


    fig2.update_layout(title= f' Total Sold Flip Properties in %s.'%city_name.capitalize(),
                       title_x=0.5, 
                       legend_title="Market", yaxis_title='#of Sold Properties', 
                       barmode='stack',
                       height=400, width=800,
                      yaxis_range=[0,150])
    fig2.show()
    return None

1. Chicago ¶

In [14]:
show_invested_properties('chicago', chicago_list, assessor_records, "2021-01", "2021-12")
In [15]:
show_flip_properties('chicago', chicago_list, assessor_records, "2021-01", "2021-12")

2. Dallas¶

In [16]:
show_invested_properties('dallas', dallas_list, assessor_records,"2021-01", "2021-12")
In [17]:
show_flip_properties('dallas', dallas_list, assessor_records,"2021-01", "2021-12")

3. Denver¶

In [18]:
show_invested_properties('denver', denver_list, assessor_records, "2021-01", "2021-12")
In [19]:
show_flip_properties('denver', denver_list, assessor_records, "2021-01", "2021-12")